The PGC3 MDD study is including the following analyses to identify genes associated with MDD:
Show code
##########
# Nearest gene
##########
library(data.table)
# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread('../../docs/tables/meta_snps_full_eur.cojo.txt')
# Link SNPs to nearest gene
# Insert nearest gene information
library(biomaRt)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
library(GenomicRanges)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
##
## first, second
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
##
## shift
## Loading required package: GenomeInfoDb
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]
# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]
# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol')], by=c('ensembl_gene_id', 'chromosome_name'), all.x=TRUE, suffix=c('.37', '.38'))
# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))
window<-50000
for(i in 1:nrow(indep_hits)){
Genes_i<-Genes[Genes$start_position < (indep_hits$BP[i] + window) & Genes$end_position > (indep_hits$BP[i] - window) & Genes$chromosome_name == indep_hits$CHR[i],]
if(nrow(Genes_i) != 0){
gene_string<-NULL
for(j in 1:nrow(Genes_i)){
if(indep_hits$BP[i] > Genes_i$start_position[j] & indep_hits$BP[i] < Genes_i$end_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=0,
Pos=NA))
}
if(indep_hits$BP[i] < Genes_i$start_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$start_position[j]),
Pos='-'))
}
if(indep_hits$BP[i] > Genes_i$end_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$end_position[j]),
Pos='+'))
}
}
gene_string<-gene_string[order(gene_string$Dist),]
indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
} else {
indep_hits$NearestGene[i]<-NA
indep_hits$NearestENSG[i]<-NA
}
}
##########
# SNP-finemapping
##########
# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread('../../docs/tables/finemapping/Locus_FineMapping_Full_Results.csv')
# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])
##########
# FastBAT
##########
# Read in FastBAT results
fastbat<-fread('../../docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat')
fastbat$P.FDR<-p.adjust(fastbat$Pvalue, method='fdr')
##########
# H-MAGMA
##########
hmagma<-fread('../../docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv')
# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]
# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')
hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########
twas<-fread('../../docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt')
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)
# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes, GRanges(seqnames=chromosome_name, ranges=IRanges(start=start_position, end=end_position)))
twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')
twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]
# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <- Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]
twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)
twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########
coloc<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv')
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]
coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# FOCUS
##########
focus<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv')
fusion <- fread("../../docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt")
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]
focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')
focus_sig<-focus[focus$FOCUS_pip > 0.5,]
focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# Expression analysis based high confidence genes
##########
expression_highconf_res<-fread('../../docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv')
expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# SMR
##########
# Read in the SMR results
smr_res<-list()
smr_res[['eQTLGen_Blood']]<-fread('../../docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv')
names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'
tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")
for(tissue_i in tissue){
smr_res[[tissue_i]]<-fread(paste0('../../docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv'))
smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}
smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")
smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########
heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########
# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
if(i != 6){
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
} else {
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC')))
}
}
pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)
# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread('../../docs/tables/pwas/rosmap_banner_pwas_smr_results.csv')
pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
########
# PsyOPS
########
psyops <- fread('../../docs/tables/psyops/psyops_full_eur.cojo.txt')
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL
psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]
psyops_prioritised<-NULL
for(i in unique(psyops_genes$lead_variant)){
tmp<-psyops_genes[psyops_genes$lead_variant == i,]
tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
tmp<-tmp[tmp$distance == min(tmp$distance),]
psyops_prioritised<-rbind(psyops_prioritised, tmp)
}
This plot will show the number of genes returned by each analysis and the overlap of each analysis
Show code
# Create data.frame listing genes with T/F indicating whether it was supported by each analysis
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$P.FDR < 0.05]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id
library(UpSetR)
png('../../docs/figures/gene_consensus_upset_dense.png', units = 'px', res=300, height=1500, width=2500)
upset(fromList(gene_overlap), nsets=10, order.by = "freq")
dev.off()
## png
## 2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]
png('../../docs/figures/gene_consensus_upset_highconf.png', units = 'px', res=300, height=1400, width=1500)
upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")
dev.off()
## png
## 2
Show UpSet plot of genes across all analyses
High-confidence genes
Show UpSet plot of genes across high-confidence analyses
High-confidence genes
Show code
# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)
# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
high_conf_tab$finemap[i]<-'Sig'
} else {
high_conf_tab$finemap[i]<-'Non-Sig'
}
}
}
# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ID[i] %in% twas$ID)){
high_conf_tab$expression[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
high_conf_tab$expression[i]<-'Non-Sig'
} else {
if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
high_conf_tab$expression[i]<-'Sig'
} else {
if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$expression[i]<-'Pos'
} else {
high_conf_tab$expression[i]<-'Neg'
}
}
}
}
}
# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
high_conf_tab$protein[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
high_conf_tab$protein[i]<-'Non-Sig'
} else {
if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$protein[i]<-'Pos'
} else {
high_conf_tab$protein[i]<-'Neg'
}
}
}
}
# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$P.FDR < 0.05]){
high_conf_tab$fastBAT[i]<-'Sig'
} else {
high_conf_tab$fastBAT[i]<-'Non-Sig'
}
}
}
# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
high_conf_tab$hmagma[i]<-'Sig'
} else {
high_conf_tab$hmagma[i]<-'Non-Sig'
}
}
}
# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
high_conf_tab$psyops[i]<-'Sig'
} else {
high_conf_tab$psyops[i]<-'Non-Sig'
}
}
}
# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'
high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T
high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])
high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID, levels = high_conf_tab_ordered$ID)
names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
png('../../docs/figures/gene_con_heatmap.png', units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
theme_minimal_grid(color = "white") +
geom_tile(colour = 'black', width=1.5) +
scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
labs(x ='', y = "", title='', fill='') +
facet_grid(~ variable) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
dev.off()
## png
## 2
Show heatmap of high confidence associations
High-confidence genes
This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).
Show code
###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL
# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'
twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL
all_func_res<-rbind(all_func_res, twas_tmp)
# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_tmp)
# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)
pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_banner_tmp)
# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR
smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_rosmap_tmp)
# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]
# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]
all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))
all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'
all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))
# Create heatmap
library(ggplot2)
heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
theme_bw() +
geom_tile(aes(fill = Z), width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label=round(Z,1)), color="black", size=3) +
labs(title="High confidence genes across panels and methods") +
facet_wrap(~ Group , nrow=1, scales = "free_x")
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
group_siz<-rbind(group_siz, data.frame(Group=i,
Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}
# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop
library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))
for(i in 1:nrow(group_siz)){
gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]
}
png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)
grid.draw(gt)
a<-dev.off()
Show heatmap of high confidence associations across expression and protein datasets and methods
High-confidence genes
Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.
Show code
library(httr)
library(jsonlite)
# the PantherDB URL
panther_db <- "http://pantherdb.org"
# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))
# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]
# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))
# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]
The next step is to query the overrepresentation test.
Show code
# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
modify_url(url,
path="/services/oai/pantherdb/enrich/overrep",
query=list(geneInputList=paste(gene_list, collapse=","),
organism=organism_id,
annotDataSet=annot_data_set_id,
enrichmentTestType="FISHER",
correction="FDR"))
}
# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
# make query
overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
# parse JSON
overrep <- fromJSON(content(overrep_response, "text"))
return(overrep)
}
high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)
high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)
high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)
Show code
# extract results table from the query
panther_format <- function(query) {
results <- query$results$result
results_labels <- results$term
results_terms <- cbind(results_labels,
results[,c('fold_enrichment', 'fdr',
'number_in_list', 'expected',
'number_in_reference', 'pValue')])
return(results_terms)
}
high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)
Show Go: Biological Processes table
# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0007399 | nervous system development | 2.812943 | 0.0000000 | 58 | 20.6189713 | 2081 | 0.0000000 |
| GO:0007275 | multicellular organism development | 1.931022 | 0.0000090 | 79 | 40.9109719 | 4129 | 0.0000000 |
| GO:0048731 | system development | 1.981079 | 0.0000128 | 73 | 36.8486085 | 3719 | 0.0000000 |
| GO:0048856 | anatomical structure development | 1.745994 | 0.0000640 | 87 | 49.8283549 | 5029 | 0.0000000 |
| GO:0006928 | movement of cell or subcellular component | 2.659549 | 0.0000794 | 39 | 14.6641410 | 1480 | 0.0000000 |
| GO:0032502 | developmental process | 1.669706 | 0.0001268 | 92 | 55.0995192 | 5561 | 0.0000000 |
| GO:0032501 | multicellular organismal process | 1.559432 | 0.0004556 | 100 | 64.1258925 | 6472 | 0.0000002 |
| GO:0022008 | neurogenesis | 2.723282 | 0.0004159 | 33 | 12.1177328 | 1223 | 0.0000002 |
| GO:0065008 | regulation of biological quality | 1.812888 | 0.0010111 | 67 | 36.9575987 | 3730 | 0.0000006 |
| GO:0007417 | central nervous system development | 2.825941 | 0.0015375 | 28 | 9.9082034 | 1000 | 0.0000010 |
| GO:0048699 | generation of neurons | 2.707556 | 0.0020268 | 29 | 10.7107679 | 1081 | 0.0000014 |
| GO:0120035 | regulation of plasma membrane bounded cell projection organization | 3.402016 | 0.0020683 | 21 | 6.1728107 | 623 | 0.0000016 |
| GO:0099537 | trans-synaptic signaling | 4.085119 | 0.0019212 | 17 | 4.1614454 | 420 | 0.0000016 |
| GO:0050807 | regulation of synapse organization | 5.794821 | 0.0022467 | 12 | 2.0708145 | 209 | 0.0000020 |
| GO:0031344 | regulation of cell projection organization | 3.316832 | 0.0024349 | 21 | 6.3313420 | 639 | 0.0000023 |
| GO:0050803 | regulation of synapse structure or activity | 5.633105 | 0.0025969 | 12 | 2.1302637 | 215 | 0.0000027 |
| GO:0099536 | synaptic signaling | 3.812778 | 0.0035837 | 17 | 4.4586915 | 450 | 0.0000039 |
| GO:0032879 | regulation of localization | 1.900136 | 0.0037639 | 52 | 27.3664578 | 2762 | 0.0000043 |
| GO:0048812 | neuron projection morphogenesis | 3.713745 | 0.0044858 | 17 | 4.5775900 | 462 | 0.0000054 |
| GO:0009653 | anatomical structure morphogenesis | 2.032387 | 0.0046715 | 44 | 21.6494244 | 2185 | 0.0000060 |
| GO:0120039 | plasma membrane bounded cell projection morphogenesis | 3.681867 | 0.0045280 | 17 | 4.6172228 | 466 | 0.0000061 |
| GO:0000902 | cell morphogenesis | 3.089586 | 0.0048074 | 21 | 6.7970275 | 686 | 0.0000067 |
| GO:0048858 | cell projection morphogenesis | 3.650532 | 0.0046069 | 17 | 4.6568556 | 470 | 0.0000068 |
| GO:0007420 | brain development | 2.933134 | 0.0058272 | 22 | 7.5005100 | 757 | 0.0000089 |
| GO:0010975 | regulation of neuron projection development | 3.738017 | 0.0060076 | 16 | 4.2803439 | 432 | 0.0000096 |
| GO:0032990 | cell part morphogenesis | 3.508691 | 0.0067072 | 17 | 4.8451115 | 489 | 0.0000111 |
| GO:0031346 | positive regulation of cell projection organization | 4.155796 | 0.0065795 | 14 | 3.3687892 | 340 | 0.0000113 |
| GO:0050808 | synapse organization | 4.462735 | 0.0063869 | 13 | 2.9130118 | 294 | 0.0000114 |
| GO:0030182 | neuron differentiation | 2.577690 | 0.0066211 | 26 | 10.0865511 | 1018 | 0.0000123 |
| GO:0048519 | negative regulation of biological process | 1.545961 | 0.0064905 | 81 | 52.3945796 | 5288 | 0.0000124 |
| GO:0034765 | regulation of ion transmembrane transport | 3.466162 | 0.0065496 | 17 | 4.9045607 | 495 | 0.0000130 |
| GO:0007416 | synapse assembly | 7.838949 | 0.0068179 | 8 | 1.0205450 | 103 | 0.0000139 |
| GO:0007268 | chemical synaptic transmission | 3.765913 | 0.0080043 | 15 | 3.9830978 | 402 | 0.0000169 |
| GO:0098916 | anterograde trans-synaptic signaling | 3.765913 | 0.0077689 | 15 | 3.9830978 | 402 | 0.0000169 |
| GO:0098742 | cell-cell adhesion via plasma-membrane adhesion molecules | 4.587567 | 0.0086027 | 12 | 2.6157657 | 264 | 0.0000192 |
| GO:0050890 | cognition | 4.232400 | 0.0084954 | 13 | 3.0715431 | 310 | 0.0000195 |
| GO:0060322 | head development | 2.761670 | 0.0093246 | 22 | 7.9661955 | 804 | 0.0000220 |
| GO:0040011 | locomotion | 2.352787 | 0.0091546 | 29 | 12.3258050 | 1244 | 0.0000222 |
| GO:0043269 | regulation of ion transport | 2.938180 | 0.0091558 | 20 | 6.8069357 | 687 | 0.0000228 |
| GO:0048513 | animal organ development | 1.746133 | 0.0098012 | 55 | 31.4981786 | 3179 | 0.0000250 |
| GO:0034762 | regulation of transmembrane transport | 3.143039 | 0.0097187 | 18 | 5.7269416 | 578 | 0.0000254 |
| GO:0007610 | behavior | 3.121437 | 0.0103579 | 18 | 5.7665744 | 582 | 0.0000278 |
| GO:0010976 | positive regulation of neuron projection development | 6.055588 | 0.0102893 | 9 | 1.4862305 | 150 | 0.0000282 |
| GO:0050804 | modulation of chemical synaptic transmission | 3.570512 | 0.0108893 | 15 | 4.2010782 | 424 | 0.0000306 |
| GO:0048523 | negative regulation of cellular process | 1.555267 | 0.0107186 | 75 | 48.2232260 | 4867 | 0.0000308 |
| GO:0099177 | regulation of trans-synaptic signaling | 3.562111 | 0.0106914 | 15 | 4.2109864 | 425 | 0.0000314 |
| GO:0098609 | cell-cell adhesion | 3.177315 | 0.0125648 | 17 | 5.3504298 | 540 | 0.0000377 |
| GO:0032409 | regulation of transporter activity | 4.133507 | 0.0166311 | 12 | 2.9031036 | 293 | 0.0000509 |
| GO:0001764 | neuron migration | 6.408030 | 0.0173416 | 8 | 1.2484336 | 126 | 0.0000542 |
| GO:0051252 | regulation of RNA metabolic process | 1.640425 | 0.0170847 | 61 | 37.1854874 | 3753 | 0.0000545 |
| GO:0007156 | homophilic cell adhesion via plasma membrane adhesion molecules | 5.406775 | 0.0200490 | 9 | 1.6645782 | 168 | 0.0000652 |
| GO:1900244 | positive regulation of synaptic vesicle endocytosis | 50.463235 | 0.0225552 | 3 | 0.0594492 | 6 | 0.0000748 |
| GO:0048666 | neuron development | 2.587858 | 0.0249534 | 21 | 8.1148186 | 819 | 0.0000844 |
| GO:0010468 | regulation of gene expression | 1.524443 | 0.0252772 | 73 | 47.8863471 | 4833 | 0.0000871 |
| GO:0019219 | regulation of nucleobase-containing compound metabolic process | 1.591351 | 0.0255573 | 64 | 40.2173976 | 4059 | 0.0000897 |
| GO:0032989 | cellular component morphogenesis | 2.948024 | 0.0257494 | 17 | 5.7665744 | 582 | 0.0000920 |
| GO:0032412 | regulation of ion transmembrane transporter activity | 4.173651 | 0.0265634 | 11 | 2.6355821 | 266 | 0.0000966 |
| GO:0007611 | learning or memory | 4.158019 | 0.0269512 | 11 | 2.6454903 | 267 | 0.0000997 |
| GO:2001257 | regulation of cation channel activity | 5.046323 | 0.0286563 | 9 | 1.7834766 | 180 | 0.0001079 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 3.348272 | 0.0284968 | 14 | 4.1812618 | 422 | 0.0001091 |
| GO:0050793 | regulation of developmental process | 1.802990 | 0.0323324 | 44 | 24.4039050 | 2463 | 0.0001258 |
| GO:0051179 | localization | 1.487379 | 0.0318546 | 76 | 51.0966050 | 5157 | 0.0001260 |
| GO:0022898 | regulation of transmembrane transporter activity | 4.022432 | 0.0328453 | 11 | 2.7346641 | 276 | 0.0001320 |
| GO:0008088 | axo-dendritic transport | 8.183227 | 0.0333436 | 6 | 0.7332071 | 74 | 0.0001362 |
| GO:0034330 | cell junction organization | 3.108618 | 0.0331094 | 15 | 4.8252951 | 487 | 0.0001373 |
| GO:1903423 | positive regulation of synaptic vesicle recycling | 37.847426 | 0.0344040 | 3 | 0.0792656 | 8 | 0.0001449 |
| GO:0008150 | biological_process | 1.096351 | 0.0356215 | 194 | 176.9506047 | 17859 | 0.0001523 |
| NA | UNCLASSIFIED | 0.369694 | 0.0350976 | 10 | 27.0493953 | 2730 | 0.0001523 |
| GO:0007612 | learning | 5.382745 | 0.0389315 | 8 | 1.4862305 | 150 | 0.0001714 |
| GO:0051049 | regulation of transport | 1.969862 | 0.0448064 | 34 | 17.2600903 | 1742 | 0.0002001 |
| GO:2000311 | regulation of AMPA receptor activity | 15.527149 | 0.0445554 | 4 | 0.2576133 | 26 | 0.0002019 |
| GO:0030154 | cell differentiation | 1.624036 | 0.0451396 | 55 | 33.8662393 | 3418 | 0.0002074 |
| GO:0048869 | cellular developmental process | 1.613650 | 0.0480978 | 55 | 34.0842197 | 3440 | 0.0002240 |
Show Go: Molecular functions table
# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|
Show Go: Cellular Components table
# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0043005 | neuron projection | 3.694234 | 0.0000000 | 50 | 13.5346059 | 1366 | 0.0000000 |
| GO:0045202 | synapse | 3.607201 | 0.0000000 | 48 | 13.3067172 | 1343 | 0.0000000 |
| GO:0036477 | somatodendritic compartment | 4.180386 | 0.0000000 | 35 | 8.3724319 | 845 | 0.0000000 |
| GO:0030425 | dendrite | 4.891428 | 0.0000000 | 30 | 6.1331779 | 619 | 0.0000000 |
| GO:0097447 | dendritic tree | 4.875675 | 0.0000000 | 30 | 6.1529943 | 621 | 0.0000000 |
| GO:0098794 | postsynapse | 4.557970 | 0.0000000 | 28 | 6.1430861 | 620 | 0.0000000 |
| GO:0030054 | cell junction | 2.575628 | 0.0000000 | 54 | 20.9657584 | 2116 | 0.0000000 |
| GO:0030424 | axon | 4.429375 | 0.0000000 | 28 | 6.3214338 | 638 | 0.0000000 |
| GO:0120025 | plasma membrane bounded cell projection | 2.479212 | 0.0000001 | 55 | 22.1844674 | 2239 | 0.0000000 |
| GO:0042995 | cell projection | 2.409157 | 0.0000001 | 56 | 23.2446452 | 2346 | 0.0000000 |
| GO:0098984 | neuron to neuron synapse | 5.447736 | 0.0000009 | 19 | 3.4876876 | 352 | 0.0000000 |
| GO:0030426 | growth cone | 7.951783 | 0.0000040 | 13 | 1.6348536 | 165 | 0.0000000 |
| GO:0030427 | site of polarized growth | 7.672773 | 0.0000055 | 13 | 1.6943028 | 171 | 0.0000000 |
| GO:0014069 | postsynaptic density | 5.328416 | 0.0000066 | 17 | 3.1904415 | 322 | 0.0000000 |
| GO:0032279 | asymmetric synapse | 5.246942 | 0.0000076 | 17 | 3.2399825 | 327 | 0.0000001 |
| GO:0098793 | presynapse | 4.099528 | 0.0000107 | 21 | 5.1225412 | 517 | 0.0000001 |
| GO:0099572 | postsynaptic specialization | 4.973188 | 0.0000139 | 17 | 3.4183302 | 345 | 0.0000001 |
| GO:0098978 | glutamatergic synapse | 4.893405 | 0.0000385 | 16 | 3.2697071 | 330 | 0.0000003 |
| GO:0150034 | distal axon | 5.119459 | 0.0001219 | 14 | 2.7346641 | 276 | 0.0000011 |
| GO:0097060 | synaptic membrane | 4.260748 | 0.0001981 | 16 | 3.7552091 | 379 | 0.0000019 |
| GO:0043197 | dendritic spine | 6.417290 | 0.0002074 | 11 | 1.7141192 | 173 | 0.0000021 |
| GO:0044309 | neuron spine | 6.343950 | 0.0002199 | 11 | 1.7339356 | 175 | 0.0000024 |
| GO:0031224 | intrinsic component of membrane | 1.524313 | 0.0005091 | 90 | 59.0429841 | 5959 | 0.0000057 |
| GO:0045211 | postsynaptic membrane | 4.771070 | 0.0004890 | 13 | 2.7247559 | 275 | 0.0000058 |
| GO:0098839 | postsynaptic density membrane | 8.872657 | 0.0004872 | 8 | 0.9016465 | 91 | 0.0000060 |
| GO:0099240 | intrinsic component of synaptic membrane | 6.154053 | 0.0006943 | 10 | 1.6249454 | 164 | 0.0000088 |
| GO:0099699 | integral component of synaptic membrane | 5.975909 | 0.0023552 | 9 | 1.5060469 | 152 | 0.0000312 |
| GO:0099634 | postsynaptic specialization membrane | 6.842473 | 0.0025466 | 8 | 1.1691680 | 118 | 0.0000349 |
| GO:0043025 | neuronal cell body | 3.288846 | 0.0030381 | 16 | 4.8649279 | 491 | 0.0000432 |
| GO:0098797 | plasma membrane protein complex | 2.791880 | 0.0031135 | 20 | 7.1636311 | 723 | 0.0000458 |
| GO:0016021 | integral component of membrane | 1.461694 | 0.0055241 | 84 | 57.4675798 | 5800 | 0.0000839 |
| GO:0005891 | voltage-gated calcium channel complex | 11.468917 | 0.0074120 | 5 | 0.4359610 | 44 | 0.0001162 |
| GO:0044295 | axonal growth cone | 16.821078 | 0.0094715 | 4 | 0.2377969 | 24 | 0.0001531 |
| GO:0098590 | plasma membrane region | 2.188767 | 0.0098278 | 27 | 12.3357132 | 1245 | 0.0001637 |
| GO:0044297 | cell body | 2.888772 | 0.0108281 | 16 | 5.5386857 | 559 | 0.0001857 |
| GO:0099061 | integral component of postsynaptic density membrane | 9.704468 | 0.0135772 | 5 | 0.5152266 | 52 | 0.0002395 |
| GO:0099055 | integral component of postsynaptic membrane | 5.936851 | 0.0137258 | 7 | 1.1790762 | 119 | 0.0002488 |
| GO:0034702 | ion channel complex | 3.713014 | 0.0138248 | 11 | 2.9625528 | 299 | 0.0002574 |
| GO:0099146 | intrinsic component of postsynaptic density membrane | 9.011292 | 0.0172514 | 5 | 0.5548594 | 56 | 0.0003296 |
| GO:0098936 | intrinsic component of postsynaptic membrane | 5.651882 | 0.0168688 | 7 | 1.2385254 | 125 | 0.0003306 |
| GO:0120111 | neuron projection cytoplasm | 6.728431 | 0.0183688 | 6 | 0.8917383 | 90 | 0.0003690 |
| GO:0005798 | Golgi-associated vesicle | 6.582161 | 0.0200335 | 6 | 0.9115547 | 92 | 0.0004123 |
| GO:0034703 | cation channel complex | 4.091614 | 0.0227101 | 9 | 2.1996212 | 222 | 0.0004785 |
| GO:0005886 | plasma membrane | 1.389752 | 0.0286206 | 82 | 59.0033513 | 5955 | 0.0006170 |
| GO:0043198 | dendritic shaft | 11.214052 | 0.0282800 | 4 | 0.3566953 | 36 | 0.0006235 |
| GO:0000785 | chromatin | 2.059724 | 0.0279234 | 26 | 12.6230511 | 1274 | 0.0006293 |
| GO:0005794 | Golgi apparatus | 1.892753 | 0.0288595 | 31 | 16.3782602 | 1653 | 0.0006646 |
| GO:0034704 | calcium channel complex | 7.209034 | 0.0364030 | 5 | 0.6935742 | 70 | 0.0008561 |
| GO:0071944 | cell periphery | 1.348613 | 0.0451782 | 86 | 63.7691971 | 6436 | 0.0010846 |
| GO:0098982 | GABA-ergic synapse | 6.728431 | 0.0467931 | 5 | 0.7431153 | 75 | 0.0011463 |